# Esta primera parte está adaptada de:
# http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs, make_moons, make_circles
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
%matplotlib inline
# data preparation
simple = make_blobs(n_samples=500, n_features=2,
centers=[[0,0], [2.5,2.5]],
random_state=1)
X,y = make_blobs(n_samples=500, n_features=2,
centers=[[0,0], [5,1]],
random_state=1)
X = X*np.matrix([[1,-2],[-20,10]])
linearly_separable=(X,y)
datasets = [simple,
linearly_separable,
make_moons(noise=0.1, random_state=0, n_samples=500),
make_circles(noise=0.1, factor=0.5, random_state=1,
n_samples=500)
]
classifiers = [("Naive Bayes", GaussianNB()),
("Nearest Neighbors",
KNeighborsClassifier(n_neighbors=1)), # número de vecinos
("Decision Tree",
DecisionTreeClassifier(criterion='entropy',
max_depth=2)), # profundidad máxima del árbol
("Logistic Regression",
LogisticRegression(C=1e10,solver='lbfgs')), # C: cuanto más alto menos regularización
("Neural Network",
MLPClassifier(hidden_layer_sizes=(50,),
max_iter=1000,
alpha=0))]
from p4_IA_aux import plot_classifiers
plot_classifiers(classifiers, datasets)
# Listado de los parámetros de los diferentes clasificadores
for name, clf in classifiers:
print(clf)
Prueba a cambiar los siguientes parámetros y observa las consecuencias en la frontera de clasificación construida:
classifiers = [("Nearest Neighbors (even)",
KNeighborsClassifier(n_neighbors=2)),
("Nearest Neighbors (odd)",
KNeighborsClassifier(n_neighbors=3)),
("Decision Tree",
DecisionTreeClassifier(criterion='entropy',
max_depth=10)), # modified to 10
("Neural Network",
MLPClassifier(hidden_layer_sizes=(50,20,50), # added new layers
max_iter=1000,
alpha=0))]
plot_classifiers(classifiers, datasets)